
	clear;

	fid=fopen('docu','r');
	temp=textscan(fid,'%d %d %d','delimiter','\t');
	fclose(fid);
	R=temp{1};C=temp{2};V=temp{3};
	R=double(R); C=double(C); V=double(V);
	X=sparse(R,C,V);

	fid=fopen('dict','r');
	temp=textscan(fid,'%s','delimiter','\n');
	fclose(fid);
	dict=temp{1};

	fid=fopen('stoplist','r');
	temp=textscan(fid,'%s','delimiter','\n');
	fclose(fid);
	stop=temp{1};

	X0=X;
	dict0=dict;
	keep=~ismember(dict,stop);
	X=X(:,keep);
	dict=dict(keep);

	fid=fopen('ksvc.info','r');
	temp=textscan(fid,'%s %s','delimiter','\t');
	fclose(fid);
	year=temp{2};
	cat=temp{1};

	[~,cmdout]=system('wc -l ks | cut -d'' '' -f1');
	ks_length=repmat({'ks'},str2num(cmdout),1);
	[~,cmdout]=system('wc -l vc | cut -d'' '' -f1');
	vc_length=repmat({'vc'},str2num(cmdout),1);

	ksvc=[ks_length;vc_length];

	focal='ks';
	keep=strcmp(ksvc,focal);
	catx=cat(keep);
	[a b]=count_unique(catx);
	[~,idx]=sort(b,'descend');
	cat_focal=a(idx);
	XX=X>0;

	label={};

	for i=1:length(cat_focal)
		keep=strcmp(ksvc,focal) & strcmp(cat,cat_focal{i});
		XX_=XX(keep,:);
		aggregate=sum(XX_,1);
		[~,idx]=sort(aggregate,'descend');
		temp='';
		temp=[temp sprintf('%s (%d):', cat_focal{i},sum(keep))];
		for j=1:5
			base=full(aggregate(idx(j)));
			if base==0, break; end
			temp=[temp sprintf(' %s (%.f%%)',dict{idx(j)},base/sum(keep)*100)];
		end
		label{end+1}=temp;
	end

	cat_ks=cat_focal;
	label_ks=label;

	focal='vc';
	keep=strcmp(ksvc,focal);
	catx=cat(keep);
	[a b]=count_unique(catx);
	[~,idx]=sort(b,'descend');
	cat_focal=a(idx);
	XX=X>0;

	label={};

	for i=1:length(cat_focal)
		keep=strcmp(ksvc,focal) & strcmp(cat,cat_focal{i});
		XX_=XX(keep,:);
		aggregate=sum(XX_,1);
		[~,idx]=sort(aggregate,'descend');
		temp='';
		temp=[temp sprintf('%s (%d):', cat_focal{i},sum(keep))];
		for j=1:5
			base=full(aggregate(idx(j)));
			if base==0, break; end
			temp=[temp sprintf(' %s (%.f%%)',dict{idx(j)},base/sum(keep)*100)];
		end
		label{end+1}=temp;
	end

	cat_vc=cat_focal;
	label_vc=label;

	min_element=10000;
	max_element=0;

	XX=tfidf_rescaling(X);

	json='';
	for j=1:length(cat_vc)
		json=[json sprintf(',%s',label_vc{j})];
	end
	json=[json sprintf('\n')];

	xwalk='';

	for i=1:min(length(cat_ks),1000)
		json=[json sprintf('%s',label_ks{i})];
		for j=1:length(cat_vc)
			keep1=strcmp(ksvc,'ks') & strcmp(cat,cat_ks{i});
			keep2=strcmp(ksvc,'vc') & strcmp(cat,cat_vc{j});
			XX1=mean(XX(keep1,:),1);
			XX2=mean(XX(keep2,:),1);
			ww=(XX1.*XX2);
			[~,idx]=sort(ww,'descend');

			jaccard=full(sum(XX1.*XX2));
			if jaccard>max_element, max_element=jaccard; end
			if jaccard<min_element, min_element=jaccard; end
			fprintf('%s vs %s : %.10f\n',cat_ks{i},cat_vc{j},jaccard);
			json=[json sprintf(',%.10f',jaccard)];

			xwalk=[xwalk sprintf('(KS) %s / (VC) %s --- %.10f --- ',cat_ks{i},cat_vc{j},jaccard)];
			for k=1:30
				if full(ww(idx(k)))==0, break; end
				xwalk=[xwalk sprintf(' %s',dict{idx(k)})];
			end
			xwalk=[xwalk sprintf('\n')];

		end
		json=[json sprintf('\n')];
	end

	fprintf('Range: %.10f - %.10f\n',min_element,max_element);
	
	fid=fopen('x2015-fixed.csv','w');
	fprintf(fid,'%s',json);
	fclose(fid);
	fid=fopen('xwalk','w');
	fprintf(fid,'%s',xwalk);
	fclose(fid);


